March 6, 2019
All source code and slides at [TBD]
Datasets for today are here: https://www.dropbox.com/s/zhmn02ti0ggxdj7/rladies_ggplot2_datasets.rda?dl=1
You can download them from R:
download.file( 'https://www.dropbox.com/s/zhmn02ti0ggxdj7/ rladies_ggplot2_datasets.rda?dl=1', 'rladies_ggplot2_datasets.rda')
attach('rladies_ggplot2_datasets.rda')
For example: geom_bar() uses stat_count as it's default stat.
Run this only if it's your first time using tidyverse on your computer.
install.packages("tidyverse")
library("tidyverse")
download.file(
'https://www.dropbox.com/s/zhmn02ti0ggxdj7/rladies_ggplot2_datasets.rda?dl=1',
'rladies_ggplot2_datasets.rda')
attach('rladies_ggplot2_datasets.rda')
# or download from dropbox link and then run
# load("<your path to the .rda file>")
ggplot(data = daily_df) + geom_point(mapping = aes(x = ride_date, y = n_rides))
ggplot(data = daily_df, mapping = aes(x = ride_date, y = n_rides)) + geom_point()
ggplot(daily_df, aes(x = ride_date, y = n_rides)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point()
# How else could we write this (using data = )?
hourly_df %>% ggplot(aes(x = hour, y = n_rides)) + geom_point()
# Color by days of the week. daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point()
# Size by number of riders. daily_df %>% ggplot(aes(x = ride_date, y = n_rides, size = n_riders)) + geom_point()
# Color by weekend or not.
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides,
color = day_of_week %in% c('Sat', 'Sun'))) +
geom_point()
# Color by day type.
daily_df %>%
mutate(day_type = if_else(day_of_week %in% c('Sat', 'Sun'),
'Weekend',
'Weekday')) %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_type)) +
geom_point()
daily_df %>% ggplot() + geom_point(aes(x = ride_date, y = n_rides, color = 'blue'))
daily_df %>% ggplot() + geom_point(aes(x = ride_date, y = n_rides), color = 'blue')
daily_df %>%mutate(day_type = if_else(day_of_week %in% c('Sat', 'Sun'), 'Weekend', 'Weekday')) %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_type)) %>%
geom_point()
Plot the number of unique routes per day over time, colored by day of week. (n_unique_routes)
daily_df %>% ggplot(aes(x = ride_date, y = n_unique_routes, color = day_of_week)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point() + geom_line()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point() + geom_smooth(span = .1) # try changing span
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + geom_line()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_smooth(span = .2, se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = filter(daily_df,
!(day_of_week %in% c('Sat', 'Sun'))
& n_rides < 200),
size = 5, color = 'gray') +
geom_point()
low_weekdays_df <- daily_df %>%
filter(!(day_of_week %in% c('Sat', 'Sun')) & n_rides < 100)
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides,
color = day_of_week, label = ride_date)) +
geom_point(data = low_weekdays_df, size = 5, color = 'gray') +
geom_text(data = low_weekdays_df, aes(y = n_rides + 15),
size = 2, color = 'black') +
geom_point()
# Note how we define the data here.
There are a number of other geoms besides geom_point(), geom_line(), geom_smooth(), and geom_text().
More info: https://ggplot2.tidyverse.org/reference/
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point() + geom_line() + facet_wrap( ~ day_of_week)
durham_voters_df %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(),
n_rep = sum(party == 'REP')) %>%
filter(gender_code %in% c('F','M') &
race_code %in% c('W', 'B', 'A') &
age != 'Age < 18 Or Invalid Birth Date') %>%
ggplot(aes(x = age, y = n_voters)) +
geom_bar(stat = 'identity') +
facet_grid(race_code ~ gender_code)
durham_voters_df %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(),
n_rep = sum(party == 'REP')) %>%
filter(gender_code %in% c('F','M') &
race_code %in% c('W', 'B', 'A') &
age != 'Age < 18 Or Invalid Birth Date') %>%
ggplot(aes(x = age, y = n_voters)) +
geom_bar(stat = 'identity') +
facet_grid(race_code ~ gender_code, scales = 'free_y')
Note: better to use gather
durham_voters_df %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(),
n_rep = sum(party == 'REP')) %>%
filter(gender_code %in% c('F','M') &
race_code %in% c('W', 'B', 'A') &
age != 'Age < 18 Or Invalid Birth Date') %>%
mutate(age_cat = as.numeric(as.factor(age))) %>%
ggplot(aes(x = age, y = n_voters)) +
geom_point() +
geom_line(aes(x = age_cat)) +
geom_line(aes(x = age_cat, y = n_rep), color = 'red') +
geom_point(aes(y = n_rep), color = 'red') +
facet_grid(race_code ~ gender_code, scales = 'free_y') +
expand_limits(y = 0)
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(span = .2, se = FALSE) +
xlab('') +
ylab('# of Transit Rides') +
ggtitle('Transit Rides over time by Day of Week') +
scale_color_discrete('Day of Week')
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + scale_y_reverse()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + scale_y_sqrt()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + scale_y_continuous(breaks = c(0, 200, 500))
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + theme_bw()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + theme_dark()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + theme(axis.text.x = element_text(angle = 90))
To really master themes:
Better than the regular documentation:
http://docs.ggplot2.org/current/
Don't forget the ggplot2 cheatsheet!
https://www.rstudio.com/wp-content/uploads/2015/03/ggplot2-cheatsheet.pdf
Check it out!
Wednesday, March 20th, 6pm
#tidytuesday Work GroupTopic/format suggestions?
Thanks to Elaine McVey for sharing her slides from a previous R-Ladies RTP meetup!